*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0

* This do file prepares the grid-level data set will be used when processing the cluster data to generate PLs
* It insheets the results from teh clustering algorithm
* Merges information from the original grids
* This file processes cluster based on big data establishments with weights from the second half of the alphabet 

* Generate folder for final storage of this do file
	capture mkdir "$dataoutput/USMetroGridEmp"

* Sample for weights creation from the second half of the alphabet (overlapping sample of USA MSAs and Global Cities)
* We predict employment for the first half
	local USMETROIDS 12060  12420  14460  16740  16980 17140  17460  18140  19100  19740 19820  26420  26900  28140  29820 30460  31080  33100  33340 	
		
* Begin loop by employment type
	foreach EmpType of numlist 1 { //  3 5
		* Generate workspace for outputs
		capture mkdir "$dataoutput/USMetroGridEmp/EmpType_`EmpType'"
		local count = 1	// set counter to one
		* Begin loop by metro to process metro outputs
			foreach USmid of local USMETROIDS {
				di "...Working on employment type `EmpType' and CBSA `USmid', number `count'"
				* Import data 
				qui  import delimited "$data_USMETROS/CLUSTER OUTPUT/cells_validation_last/gridcells_`USmid'_`EmpType'_output.txt", clear 
				
			* Process variables
				qui drop v1
				qui drop v2
				qui drop v5 v3 v4
				qui foreach var of varlist v6 v7 v8 v9 {
					replace `var' = `var'*180/_pi
				}
				qui ren  v6 lon_UL
				qui ren  v8 lon_LR
				qui ren  v7 lat_UL
				qui ren  v9 lat_LR
				qui ren v11 cell_MFG_emp 	// emp_mfg_wholesale
				qui ren v12 cell_NT_emp 	// emp_nontradable_services
				qui ren v13 cell_PS_emp 	//emp_public_services
				qui ren v14 cell_TS_emp 	// emp_tradable_services
				qui ren v15 cell_O_emp 		// emp_others
				qui ren v16 cell_ST_emp 	// cell_emp_search_terms
				qui ren v17 cell_total_emp
				qui drop v18
				qui ren v19 clusterID
				qui drop v20 
				qui ren v21 cell_id
				qui drop v22 v23												// dropping developable here since it will be merged from the grid data later on 
			* Generate cbasfp ID
				qui split cell_id , p(_)	
				capture drop cell_id2 
				capture drop cell_id3
				qui destring cell_id1, replace force
				qui ren cell_id1 cbsafp
			* Merge legend features
				qui duplicates drop cell_id, force // need to deal with occasional duplicates
				qui merge 1:1 cell_id using "$temp/GridLegend/GRID_`USmid'_final.dta", keepusing(cbsafp square_id metro_id cbsafp area_g developable lon lat metro_name grid_x grid_y)
				qui drop if _m == 1 // There are cells outside the CBSA since employment is read from NETS in squares and we are mergin from the entire grid universe
     			qui drop _m
				qui foreach var of varlist clusterID cell_*_emp { // zero employment cells are missing in algorithm output
						replace `var' = 0 if `var' == .
					}
			* Save CBSA data
				qui compress // finde memomry efficient formats
				qui save "$dataoutput/USMetroGridEmp/EmpType_`EmpType'/EmpGrid_`USmid'", replace
				local count = `count'+1
				}
		* Loop by metro to process metro outputs ends
			
		* Append data
		clear
		foreach USmid of local USMETROIDS{
		display "...appending CBSA `USmid', employment type `EmpType' metro-grid data..."
		append using "$dataoutput/USMetroGridEmp/EmpType_`EmpType'/EmpGrid_`USmid'"
			}	
		* Save data sets
		display "..finalizing `EmpType' metro-grid data..."
		qui duplicates drop 
		qui save "$dataoutput/USMetroGridEmp/EmpType_`EmpType'/EmpGrid__all.dta", replace
	}
* Script ends